import pandas as pd
import re

df_xlsx = pd.read_excel(r'.\data\网点ERP收入202107(1).xlsx','累计', header=None)
df_hs = pd.DataFrame()
# 机构编码   机构名称
df_all = pd.DataFrame()

# 053   253 开头  135个网点
df_all = df_xlsx.iloc[6:4940,0:2]
""" def lamda(x):
    if x === 'NaN' :
        return False
    elif re.match('^[053|253]',df_all.iloc[0,0])==='None':
        return False
    else:
        return True
 """

flag = re.match('^[053|253]',df_all.iloc[0,0])
df_all = df_all.dropna(how='any')

df_hs = df_all.loc[df_all[0].apply(lambda x : True if re.match('^053|^253',x) else False),[0,1]]
df_hs_end = df_hs.loc[df_hs[1].apply(lambda x: False if x.endswith(('营业所','部','机要室','收订班','零售班')) else True),:]
# print(df_xlsx.describe())
# print(df_xlsx.index)
# print(df_xlsx.columns)
# print(df_all.iloc[:,[0,1]])
# print(df_all)
# print(flag)
# print(df_all[0])
# print(df_hs)
# print(df_hs_end)
# df_hs.to_excel(r'.\data\hs.xlsx', sheet_name='Sheet1')
df_hs_end.to_excel(r'.\data\hs_end.xlsx', sheet_name='Sheet1')